knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(tidyverse)
library(here)
library(lubridate)
library(tsibble)
library(feasts)
library(slider)

1. Always look at your data

Toolik Station (LTER) meteorological data (Source: Source: Shaver, G. 2019. A multi-year DAILY file for the Toolik Field Station at Toolik Lake, AK starting 1988 to present. ver 4. Environmental Data Initiative.)

Notice that the date parsed (assumed class) as character. That limits the nice time series features we can use, so we’ll quickly convert it into a tsibble (a time series data frame) so that we can use functions in feasts and fable to explore & analyze it.

Read in data:

toolik <- read_csv(here("data", "toolikweather.csv"))

Convert the data frame to a tsibble

Go ahead and try plotting the data as imported.

ggplot(data = toolik, aes(x = date, y = mean_airtemp)) +
  geom_line()


### Booo we get a warning (only one observation per series)

Notice that it doesn’t work - because R doesn’t understand the date is a date until we tell it.

Let’s go ahead and convert it to a tsibble using the as_tsibble() function. First, we’ll need to convert the date to a date class, then convert to a tsibble:

toolik_ts <- toolik %>% 
  mutate(date = lubridate::mdy(date)) %>% 
  as_tsibble(key = NULL, index = date)

Now let’s plot it:

ggplot(data = toolik_ts, aes(x = date, y = mean_airtemp)) +
  geom_line() +
  labs(x = "Date",
       y = "Mean daily air temperature (Celsius)\n at Toolik Station")

We need to ask some big picture questions at this point, like:

  • Does there appear to be an overall trend? No.
  • Does there appear to be seasonality? Yes.
  • Does there appear to be cyclicality? Unsure.
  • Any notable outliers or additional patterns? No noted.

2. Use index_by() to aggregate time series by increments

We will use index_by() instead of group_by() to do the trick. See ?index_by() to group by a time index, then summarize() to specify what to calulate & return for each interval.

toolik_month <- toolik_ts %>% 
  index_by(yr_mo = ~yearmonth(.)) %>% 
  summarize(monthly_mean_temp = mean(mean_airtemp, na.rm = TRUE))

Now let’s take a look:

ggplot(data = toolik_month, aes(x = yr_mo, y = monthly_mean_temp)) +
  geom_line() 

### Or break it up by month: 
toolik_month %>% 
  ggplot(aes(x = year(yr_mo), y = monthly_mean_temp)) +
  geom_line() +
  facet_wrap(~month(yr_mo, label = TRUE)) +
  labs(x = "Year",
       y = "Annual mean air temperature (Celsius)",
       title = "Toolik Station mean annual air temperature",
       subtitle = "1988 - 2018",
       caption = "Source: Shaver, G. 2019. A multi-year DAILY weather file
                  for the Toolik Field Station at Toolik Lake, AK starting
                  1988 to present. ver 4. Environmental Data Initiative.")

Can you do other increments with index_by()? Absolutely! See ?index_by() for grouping options!

Let’s find the yearly average for 2000:

toolik_annual <- toolik_ts %>% 
  index_by(yearly = ~year(.)) %>% 
  summarize(annual_airtemp = mean(mean_airtemp, na.rm = TRUE))


ggplot(data = toolik_annual, aes(x = yearly, y = annual_airtemp)) +
  geom_line()

And how about a weekly average?

toolik_weekly <- toolik_ts %>% 
  index_by(weekly = ~yearweek(.)) %>% 
  summarize(weekly_airtemp = mean(mean_airtemp, na.rm = TRUE))


ggplot(data = toolik_weekly, aes(x = weekly, y = weekly_airtemp)) +
  geom_line()

3. Use filter_index() to filter by date-times!

We can use filter_index() specifically to help us filter data by time spans. See ?filter_index() for more information.

Example 1: Filter from June 2000 through October 2001

toolik_ts %>% 
  filter_index("2000-06" ~ "2001-10")
## # A tsibble: 518 x 5 [1D]
##    date       station              mean_airtemp daily_precip mean_windspeed
##    <date>     <chr>                       <dbl>        <dbl>          <dbl>
##  1 2000-06-01 Toolik Field Station          2.5          0              3.8
##  2 2000-06-02 Toolik Field Station         -3            6.6            2.3
##  3 2000-06-03 Toolik Field Station         -1.3          0.8            2.1
##  4 2000-06-04 Toolik Field Station          4.8          1.3            2  
##  5 2000-06-05 Toolik Field Station          8.5          0              3.2
##  6 2000-06-06 Toolik Field Station         12.8          0.8            4.1
##  7 2000-06-07 Toolik Field Station         12.9          0              2.8
##  8 2000-06-08 Toolik Field Station          9.5          0              3.2
##  9 2000-06-09 Toolik Field Station          9.6          1              2.7
## 10 2000-06-10 Toolik Field Station         12.3          0              3.1
## # … with 508 more rows

Example 2: Filter from April 10, 2006 to May 15, 2006

toolik_ts %>% 
  filter_index("2006-04-10" ~ "2006-05-15")
## # A tsibble: 36 x 5 [1D]
##    date       station              mean_airtemp daily_precip mean_windspeed
##    <date>     <chr>                       <dbl>        <dbl>          <dbl>
##  1 2006-04-10 Toolik Field Station        -18.5            0             NA
##  2 2006-04-11 Toolik Field Station        -13.5            0             NA
##  3 2006-04-12 Toolik Field Station        -22.5            0             NA
##  4 2006-04-13 Toolik Field Station        -24              0             NA
##  5 2006-04-14 Toolik Field Station        -24.6            0             NA
##  6 2006-04-15 Toolik Field Station        -22.2            0             NA
##  7 2006-04-16 Toolik Field Station        -19.6            0             NA
##  8 2006-04-17 Toolik Field Station        -25              0             NA
##  9 2006-04-18 Toolik Field Station        -21.9            0             NA
## 10 2006-04-19 Toolik Field Station        -26.2            0             NA
## # … with 26 more rows

Example 3: Filter from December 20, 2017 to the end of the dataset

toolik_ts %>% 
  filter_index("2017-12-20" ~ .)
## # A tsibble: 377 x 5 [1D]
##    date       station              mean_airtemp daily_precip mean_windspeed
##    <date>     <chr>                       <dbl>        <dbl>          <dbl>
##  1 2017-12-20 Toolik Field Station         -6.5          0              6.8
##  2 2017-12-21 Toolik Field Station         -3.4          0              7.7
##  3 2017-12-22 Toolik Field Station         -1.7          0              6.3
##  4 2017-12-23 Toolik Field Station         -1.4          0              6.6
##  5 2017-12-24 Toolik Field Station         -3.9          0              4.4
##  6 2017-12-25 Toolik Field Station         -7.4          0              2.7
##  7 2017-12-26 Toolik Field Station        -12.9          0.2            1.9
##  8 2017-12-27 Toolik Field Station        -12.5          0.8            1.3
##  9 2017-12-28 Toolik Field Station        -17.1          0.3            0.3
## 10 2017-12-29 Toolik Field Station        -22.6          0.1            0.9
## # … with 367 more rows

4. Explore changes in seasonality with seasonplots

Let’s look at seasonality over the years with a seasonplot, using the feasts::gg_season() function. Notice that we can still do wrangling on a tsibble like we would with a normal data frame:

toolik_ts %>% 
  filter(year(date) > 2014) %>% 
  gg_season(y = mean_airtemp)

Daily measurements seems a bit excessive to return in this visualization, right? Maybe it makes more sense to use the monthly averages in ``.

### Now a season plot: 
toolik_month %>% 
  gg_season(y = monthly_mean_temp) +
  theme_minimal() +
  labs(x = "Year",
       y = "Mean monthly air temperature (Celsius)",
       title = "Toolik Station air temperature")

5. Seasonal subseries plots

Sometimes it can be useful to explore how values within one season/month/etc. change over time (e.g. across years).

We can use gg_subseries() to explore how values change within a specified window over time.

Do you notice any trends that differ across the months?

toolik_month %>% 
  gg_subseries(monthly_mean_temp)

6. Moving averages in tsibbles

We’ll use the slider package to find moving (or rolling) averages for different window sizes.

The general structure will tend to be something like:

df %>% slide(variable, function, .before = , .after = )

Let’s make a test vector just so we can see how this works:

set.seed(2023)
test<- rnorm(100, mean = 40, sd = 10)

### Show the series based on values +2 and -2 from each observation
### Use ~.x to show the windows
w05 <- slide(test, ~.x, .before = 2, .after = 2)
# w05

### Change that to a function name to actually calculate something for each window
### Note that I add `as.numeric` here, since the outcome is otherwise a list
w05 <- as.numeric(slide(test, mean, .before = 2, .after = 2))
# w05

### Find the mean value of a window with n = 11, centered:
w11 <- as.numeric(slide(test, mean, .before = 5, .after = 5))
# w11

### Find the mean value of a window with n = 19, centered:
w19 <- as.numeric(slide(test, mean, .before = 9, .after = 9))
# w19

### Plot these together: 
combo <- data_frame(time = seq(1:100), test, w05, w11, w19) %>%
  pivot_longer(names_to = 'series', values_to = 'value', -time)


ggplot(data = combo) +
  geom_line(aes(x = time, y = value, color = series)) +
  scale_color_manual(values = c('grey70', 'red', 'orange', 'purple')) +
  theme_minimal()

Now for an example with our Toolik Station data, let’s say we want to find the average value at each observation, with a window that extends forward and backward n days from the observation:

roll_toolik_15 <- toolik_ts %>% 
  mutate(ma_15d = as.numeric(slide(toolik_ts$mean_airtemp, mean, 
                                   .before = 7, .after = 7)))

roll_toolik_61 <- toolik_ts %>% 
  mutate(ma_61d = as.numeric(slide(toolik_ts$mean_airtemp, mean, 
                                   .before = 30, .after = 30)))


ggplot() +
  geom_line(data = toolik_ts, aes(x = date, y = mean_airtemp), 
            size = 0.2, color = "gray") +
  geom_line(data = roll_toolik_15, aes(x = date, y = ma_15d), 
            color = "orange") +
  geom_line(data = roll_toolik_61, aes(x = date, y = ma_61d), 
            color = "blue") +
  theme_minimal()

7. Autocorrelation function

We’ll look at outcomes for both daily lags (yikes) and monthly lags (cool).

toolik_ts %>%
  ACF(mean_airtemp) %>%
  autoplot()

toolik_month %>% 
  ACF(monthly_mean_temp) %>% 
  autoplot()

8. Decomposition

Here we will use STL decomposition (Seasonal, Trend, and Loess) decomposition. You can read about the advantages of STL decomposition here: https://otexts.com/fpp2/stl.html.

toolik_dec <- toolik_month %>% 
  model(STL(monthly_mean_temp ~ season(window = Inf)))

components(toolik_dec) %>% autoplot()

END Part 2